*-------------------------------------------------------------------------------
* Description: Generate Population in the largest city as % of urban population
*-------------------------------------------------------------------------------

********************************************************************************
**# Step 1: Load population in the largest city (% of urban population)
/*******************************************************************************
Source: World Bank WDI + GJV2016
Notes:  Take 0s as missing values
        Data accessed 12.03.2020
*******************************************************************************/

* Import data
import excel "raw_datasets/pop_largest_city.xlsx", sheet("Data") firstrow clear

* Replace 0s with missings
quietly{
	foreach var of varlist y1960-y2019{
	replace `var' = "" if `var' == ".." | `var' == "0"
	}
	destring y*, replace
}

* Rename for consistency
rename CountryCode code_wb

* Generate 2020 and drop 2019
rename y2019 y2020

* Reshape
reshape long y, i(code_wb) j(year)

* Rename for consistency
rename y pop_largest_city

* Keep only desired countries
merge m:1 code_wb using "processed_datasets/ccode", keepusing(country_wb)
drop if _merge == 1
drop _merge country_wb CountryName SeriesName SeriesCode 

********************************************************************************
**# Step 2.1: Missing Data - Fill with linear interpolation
********************************************************************************

sort code_wb year
by code_wb: ipolate pop_largest_city year, gen(intpop)
replace pop_largest_city = intpop if pop_largest_city == .
drop intpop

********************************************************************************
**# Step 2.2: Missing Data - Re-creating Former Sudan
/*******************************************************************************
We re-create Former Sudan by combining data from South Sudan and Sudan.
For post 2012 we use WUP data on cities of over 300 K and total urban population 
for 2015 and 2020 and then fill with interpolation. The largest city today of 
what used to be Former Sudan is Al-Khartum in Sudan, so we use this city to 
estimate urban primacy.
*******************************************************************************/

* Creating Unified Sudan
replace pop_largest_city = . if code_wb == "SDN" & year > 2011
replace pop_largest_city = 33.4324684 if code_wb == "SDN" & year == 2015
replace pop_largest_city = 34.4611633 if code_wb == "SDN" & year == 2020

********************************************************************************
**# Step 2.1: Missing Data - Fill rest of missing values
/*******************************************************************************
We use GJV2016 for pre-2010 missings and then alternative sources for other
country years. Finally we use linear interpolation
*******************************************************************************/

* Bahamas - Source: WUP (Capital City Population)

replace pop_largest_city = 100 if code_wb == "BHS" & year == 1960
replace pop_largest_city = . if code_wb == "BHS" & year == 1965
replace pop_largest_city = 90.45056 if code_wb == "BHS" & year == 1970
replace pop_largest_city = . if code_wb == "BHS" & year == 1975
replace pop_largest_city = 88.26517 if code_wb == "BHS" & year == 1980
replace pop_largest_city = . if code_wb == "BHS" & year == 1985
replace pop_largest_city = 84.53392 if code_wb == "BHS" & year == 1990
replace pop_largest_city = . if code_wb == "BHS" & year == 1995
replace pop_largest_city = 86.67214 if code_wb == "BHS" & year == 2000
replace pop_largest_city = . if code_wb == "BHS" & year == 2005
replace pop_largest_city = 87.24001 if code_wb == "BHS" & year == 2010
replace pop_largest_city = . if code_wb == "BHS" & year == 2015
replace pop_largest_city = 84.36287 if code_wb == "BHS" & year == 2020

* Belize - Source: Citypopulation.de (Estimate)

replace pop_largest_city = 65.4 if code_wb == "BLZ" & year == 1960
replace pop_largest_city = . if code_wb == "BLZ" & year == 1965
replace pop_largest_city = 62.8 if code_wb == "BLZ" & year == 1970
replace pop_largest_city = . if code_wb == "BLZ" & year == 1975
replace pop_largest_city = 55.9 if code_wb == "BLZ" & year == 1980
replace pop_largest_city = . if code_wb == "BLZ" & year == 1985
replace pop_largest_city = 48.8 if code_wb == "BLZ" & year == 1990
replace pop_largest_city = . if code_wb == "BLZ" & year == 1995
replace pop_largest_city = 40.9 if code_wb == "BLZ" & year == 2000
replace pop_largest_city = . if code_wb == "BLZ" & year == 2005
replace pop_largest_city = 38.1 if code_wb == "BLZ" & year == 2010
replace pop_largest_city = . if code_wb == "BLZ" & year == 2015
replace pop_largest_city = 35.6 if code_wb == "BLZ" & year == 2020

* Bhutan - Source: Citypopulation.de (Census 2017)

replace pop_largest_city = 35 if code_wb == "BTN" & year == 1960
replace pop_largest_city = . if code_wb == "BTN" & year == 1965
replace pop_largest_city = 35 if code_wb == "BTN" & year == 1970
replace pop_largest_city = . if code_wb == "BTN" & year == 1975
replace pop_largest_city = 34.5 if code_wb == "BTN" & year == 1980
replace pop_largest_city = . if code_wb == "BTN" & year == 1985
replace pop_largest_city = 29.5 if code_wb == "BTN" & year == 1990
replace pop_largest_city = . if code_wb == "BTN" & year == 1995
replace pop_largest_city = 34.5 if code_wb == "BTN" & year == 2000
replace pop_largest_city = . if code_wb == "BTN" & year == 2005
replace pop_largest_city = 31.2 if code_wb == "BTN" & year == 2010
replace pop_largest_city = . if code_wb == "BTN" & year == 2015
replace pop_largest_city = 32.4 if code_wb == "BTN" & year == 2020

* Botswana- Source: Citypopulation.de (Projection 2020)

replace pop_largest_city = 21.09999 if code_wb == "BWA" & year == 1960
replace pop_largest_city = . if code_wb == "BWA" & year == 1965
replace pop_largest_city = 26.16427	 if code_wb == "BWA" & year == 1970
replace pop_largest_city = . if code_wb == "BWA" & year == 1975
replace pop_largest_city = 31.65205 if code_wb == "BWA" & year == 1980
replace pop_largest_city = . if code_wb == "BWA" & year == 1985
replace pop_largest_city = 21.05217 if code_wb == "BWA" & year == 1990
replace pop_largest_city = . if code_wb == "BWA" & year == 1995
replace pop_largest_city = 19.15932 if code_wb == "BWA" & year == 2000
replace pop_largest_city = . if code_wb == "BWA" & year == 2005
replace pop_largest_city = 16.38344 if code_wb == "BWA" & year == 2010
replace pop_largest_city = . if code_wb == "BWA" & year == 2015
replace pop_largest_city = 16.3 if code_wb == "BWA" & year == 2020

* Brunei - Source: WUP (Capital City Population)

replace pop_largest_city = 28.63356 if code_wb == "BRN" & year == 1960
replace pop_largest_city = . if code_wb == "BRN" & year == 1965
replace pop_largest_city = 41.87133 if code_wb == "BRN" & year == 1970
replace pop_largest_city = . if code_wb == "BRN" & year == 1975
replace pop_largest_city = 42.52289 if code_wb == "BRN" & year == 1980
replace pop_largest_city = . if code_wb == "BRN" & year == 1985
replace pop_largest_city = 27.90686 if code_wb == "BRN" & year == 1990
replace pop_largest_city = . if code_wb == "BRN" & year == 1995
replace pop_largest_city = 12.4461 if code_wb == "BRN" & year == 2000
replace pop_largest_city = . if code_wb == "BRN" & year == 2005
replace pop_largest_city = 7.537559 if code_wb == "BRN" & year == 2010
replace pop_largest_city = . if code_wb == "BRN" & year == 2015
replace pop_largest_city = 12.10226 if code_wb == "BRN" & year == 2020

* Cabo Verde - Source: WUP (Capital City Population)

replace pop_largest_city = 36.83803 if code_wb == "CPV" & year == 1960
replace pop_largest_city = . if code_wb == "CPV" & year == 1965
replace pop_largest_city = 39.10209 if code_wb == "CPV" & year == 1970
replace pop_largest_city = . if code_wb == "CPV" & year == 1975
replace pop_largest_city = 53.64692 if code_wb == "CPV" & year == 1980
replace pop_largest_city = . if code_wb == "CPV" & year == 1985
replace pop_largest_city = 40.18542 if code_wb == "CPV" & year == 1990
replace pop_largest_city = . if code_wb == "CPV" & year == 1995
replace pop_largest_city = 40.38074 if code_wb == "CPV" & year == 2000
replace pop_largest_city = . if code_wb == "CPV" & year == 2005
replace pop_largest_city = 42.17395 if code_wb == "CPV" & year == 2010
replace pop_largest_city = . if code_wb == "CPV" & year == 2015
replace pop_largest_city = 46.05364 if code_wb == "CPV" & year == 2020

* Taiwan - Source: Citypopulation (Estimate 2019)

replace pop_largest_city = 23.6 if code_wb == "TWN" & year == 1960
replace pop_largest_city = . if code_wb == "TWN" & year == 1965
replace pop_largest_city = 24.8 if code_wb == "TWN" & year == 1970
replace pop_largest_city = . if code_wb == "TWN" & year == 1975
replace pop_largest_city = 16.4 if code_wb == "TWN" & year == 1980
replace pop_largest_city = . if code_wb == "TWN" & year == 1985
replace pop_largest_city = 17.2 if code_wb == "TWN" & year == 1990
replace pop_largest_city = . if code_wb == "TWN" & year == 1995
replace pop_largest_city = 15.1 if code_wb == "TWN" & year == 2000
replace pop_largest_city = . if code_wb == "TWN" & year == 2005
replace pop_largest_city = 14.5 if code_wb == "TWN" & year == 2010
replace pop_largest_city = . if code_wb == "TWN" & year == 2015
replace pop_largest_city = 14.1 if code_wb == "TWN" & year == 2020

* Comoros - Source:Citypopulation.de (Census 2017)

replace pop_largest_city = 35.61948 if code_wb == "COM" & year == 1960
replace pop_largest_city = . if code_wb == "COM" & year == 1965
replace pop_largest_city = 30.31733 if code_wb == "COM" & year == 1970
replace pop_largest_city = . if code_wb == "COM" & year == 1975
replace pop_largest_city = 22.53468 if code_wb == "COM" & year == 1980
replace pop_largest_city = . if code_wb == "COM" & year == 1985
replace pop_largest_city = 23.05279 if code_wb == "COM" & year == 1990
replace pop_largest_city = . if code_wb == "COM" & year == 1995
replace pop_largest_city = 24.08231 if code_wb == "COM" & year == 2000
replace pop_largest_city = . if code_wb == "COM" & year == 2005
replace pop_largest_city = 24.14323 if code_wb == "COM" & year == 2010
replace pop_largest_city = . if code_wb == "COM" & year == 2015
replace pop_largest_city = 29.3 if code_wb == "COM" & year == 2020

* Fiji - Source: Citypopulation.de (Census 2017)

replace pop_largest_city = 53.74953 if code_wb == "FJI" & year == 1960
replace pop_largest_city = . if code_wb == "FJI" & year == 1965
replace pop_largest_city = 51.3284 if code_wb == "FJI" & year == 1970
replace pop_largest_city = . if code_wb == "FJI" & year == 1975
replace pop_largest_city = 52.57967 if code_wb == "FJI" & year == 1980
replace pop_largest_city = . if code_wb == "FJI" & year == 1985
replace pop_largest_city = 49.82142 if code_wb == "FJI" & year == 1990
replace pop_largest_city = . if code_wb == "FJI" & year == 1995
replace pop_largest_city = 43.29671 if code_wb == "FJI" & year == 2000
replace pop_largest_city = . if code_wb == "FJI" & year == 2005
replace pop_largest_city = 38.62054 if code_wb == "FJI" & year == 2010
replace pop_largest_city = . if code_wb == "FJI" & year == 2015
replace pop_largest_city = 35.1 if code_wb == "FJI" & year == 2020

* Guyana - Source: CIA Factbook

replace pop_largest_city = 89.83642 if code_wb == "GUY" & year == 1960
replace pop_largest_city = . if code_wb == "GUY" & year == 1965
replace pop_largest_city = 77.3475 if code_wb == "GUY" & year == 1970
replace pop_largest_city = . if code_wb == "GUY" & year == 1975
replace pop_largest_city = 66.65032 if code_wb == "GUY" & year == 1980
replace pop_largest_city = . if code_wb == "GUY" & year == 1985
replace pop_largest_city = 71.15394 if code_wb == "GUY" & year == 1990
replace pop_largest_city = . if code_wb == "GUY" & year == 1995
replace pop_largest_city = 66.77783 if code_wb == "GUY" & year == 2000
replace pop_largest_city = . if code_wb == "GUY" & year == 2005
replace pop_largest_city = 61.47829 if code_wb == "GUY" & year == 2010
replace pop_largest_city = . if code_wb == "GUY" & year == 2015
replace pop_largest_city = 51 if code_wb == "GUY" & year == 2020

* Mauritus - Source: Citypopulation.de (Estimate 2019)

replace pop_largest_city = 46.57265 if code_wb == "MUS" & year == 1960
replace pop_largest_city = . if code_wb == "MUS" & year == 1965
replace pop_largest_city = 37.77816 if code_wb == "MUS" & year == 1970
replace pop_largest_city = . if code_wb == "MUS" & year == 1975
replace pop_largest_city = 35.17447 if code_wb == "MUS" & year == 1980
replace pop_largest_city = . if code_wb == "MUS" & year == 1985
replace pop_largest_city = 30.41315 if code_wb == "MUS" & year == 1990
replace pop_largest_city = . if code_wb == "MUS" & year == 1995
replace pop_largest_city = 28.47346 if code_wb == "MUS" & year == 2000
replace pop_largest_city = . if code_wb == "MUS" & year == 2005
replace pop_largest_city = 27.5233 if code_wb == "MUS" & year == 2010
replace pop_largest_city = . if code_wb == "MUS" & year == 2015
replace pop_largest_city = 28.2 if code_wb == "MUS" & year == 2020

* Solomon Islands - Source: Citypopulation.de (Census 2019) 

replace pop_largest_city = 57.53968 if code_wb == "SLB" & year == 1960
replace pop_largest_city = . if code_wb == "SLB" & year == 1965
replace pop_largest_city = 80.02645 if code_wb == "SLB" & year == 1970
replace pop_largest_city = . if code_wb == "SLB" & year == 1975
replace pop_largest_city = 82.13949 if code_wb == "SLB" & year == 1980
replace pop_largest_city = . if code_wb == "SLB" & year == 1985
replace pop_largest_city = 81.90977 if code_wb == "SLB" & year == 1990
replace pop_largest_city = . if code_wb == "SLB" & year == 1995
replace pop_largest_city = 78.26442 if code_wb == "SLB" & year == 2000
replace pop_largest_city = . if code_wb == "SLB" & year == 2005
replace pop_largest_city = 75.06624 if code_wb == "SLB" & year == 2010
replace pop_largest_city = . if code_wb == "SLB" & year == 2015
replace pop_largest_city = 81.3 if code_wb == "SLB" & year == 2020

* Suriname - Source: WUP (Capital City Population)

replace pop_largest_city = 72.45997 if code_wb == "SUR" & year == 1960
replace pop_largest_city = . if code_wb == "SUR" & year == 1965
replace pop_largest_city = 61.49306 if code_wb == "SUR" & year == 1970
replace pop_largest_city = . if code_wb == "SUR" & year == 1975
replace pop_largest_city = 67.23786 if code_wb == "SUR" & year == 1980
replace pop_largest_city = . if code_wb == "SUR" & year == 1985
replace pop_largest_city = 66.49446 if code_wb == "SUR" & year == 1990
replace pop_largest_city = . if code_wb == "SUR" & year == 1995
replace pop_largest_city = 68.46042 if code_wb == "SUR" & year == 2000
replace pop_largest_city = . if code_wb == "SUR" & year == 2005
replace pop_largest_city = 66.28619 if code_wb == "SUR" & year == 2010
replace pop_largest_city = . if code_wb == "SUR" & year == 2015
replace pop_largest_city = 63.78427 if code_wb == "SUR" & year == 2020

* Swaziland - Source: Keep 2010 constant

replace pop_largest_city = 21.92448 if code_wb == "SWZ" & year == 1960
replace pop_largest_city = . if code_wb == "SWZ" & year == 1965
replace pop_largest_city = 32.55598 if code_wb == "SWZ" & year == 1970
replace pop_largest_city = . if code_wb == "SWZ" & year == 1975
replace pop_largest_city = 26.1316 if code_wb == "SWZ" & year == 1980
replace pop_largest_city = . if code_wb == "SWZ" & year == 1985
replace pop_largest_city = 22.49375 if code_wb == "SWZ" & year == 1990
replace pop_largest_city = . if code_wb == "SWZ" & year == 1995
replace pop_largest_city = 27.55555 if code_wb == "SWZ" & year == 2000
replace pop_largest_city = . if code_wb == "SWZ" & year == 2005
replace pop_largest_city = 28.04809 if code_wb == "SWZ" & year == 2010
replace pop_largest_city = . if code_wb == "SWZ" & year == 2015
replace pop_largest_city = 28.04809 if code_wb == "SWZ" & year == 2020

* Lesotho - Source: Citypopulation (Census 2016)

replace pop_largest_city = 34.23761 if code_wb == "LSO" & year == 1960
replace pop_largest_city = . if code_wb == "LSO" & year == 1965
replace pop_largest_city = 34.52205 if code_wb == "LSO" & year == 1970
replace pop_largest_city = . if code_wb == "LSO" & year == 1975
replace pop_largest_city = 51.32633 if code_wb == "LSO" & year == 1980
replace pop_largest_city = . if code_wb == "LSO" & year == 1985
replace pop_largest_city = 49.31438 if code_wb == "LSO" & year == 1990
replace pop_largest_city = . if code_wb == "LSO" & year == 1995
replace pop_largest_city = 43.3866 if code_wb == "LSO" & year == 2000
replace pop_largest_city = . if code_wb == "LSO" & year == 2005
replace pop_largest_city = 39.10207 if code_wb == "LSO" & year == 2010
replace pop_largest_city = . if code_wb == "LSO" & year == 2015
replace pop_largest_city = 49.1 if code_wb == "LSO" & year == 2020

* Eritrea - Source: WUP
replace pop_largest_city = 42.9 if code_wb == "ERI" & year == 2020

* Imterpolate
sort code_wb year
by code_wb: ipolate pop_largest_city year, gen(intpop)
replace pop_largest_city = intpop if pop_largest_city == .
drop intpop

********************************************************************************
**# Step 3: Finalize and save
********************************************************************************

keep if year == 1960 | year == 1965 | year == 1970 | year == 1975 | year == 1980 | year == 1985 | year == 1990 | year == 1995 ///
| year == 2000 | year == 2005 | year == 2010 | year == 2015 | year == 2020

drop if code_wb == "SSD"

save "processed_datasets/dataset_poplargestcity", replace
